Author: Amanda Everitt
Began: 8/24/2018
Finished:: 8/25/2018
Investigate the clustering of neuronal cells in particular and Tbr1+ neuronal cells without the background noise of the other cellular subtypes. First, lets look at clustering of neuronal cells and see if it as homogenous as the first tSNE makes it appear. Next lets see if this pattern is retained in cells noticeably expressing Tbr1 transcripts.
load(file=paste0(wd, "/outputs/output_02/experiment_aggregate.0.3.RData"))
neuronal_cells <- rownames(experiment.aggregate.0.3@meta.data[experiment.aggregate.0.3@meta.data$res.0.3 %in% c(0,1,2),])
experiment.aggregate <- SubsetData(experiment.aggregate.0.3, cells.use = neuronal_cells, do.clean = T)
experiment.aggregate
## An object of class seurat in project Siavash scRNA
## 14933 genes across 12269 samples.
p1a <- ggplot(experiment.aggregate@meta.data, aes(x=nUMI)) +
geom_histogram(binwidth=50, color="black", fill="white") + ggtitle("Distribution of nUMI") +
geom_vline(aes(xintercept=4000),color="red") + theme_bw()
p1b <- VlnPlot(experiment.aggregate, "nUMI", do.return = TRUE, group.by = "orig.ident") +
geom_hline(yintercept = 4000, color= "red")
p1b <- p1b + ggtitle("Distribution of nUMI by genotype") + xlab("Genotype") + ylab("count") + theme_bw() +
theme(plot.title = element_text(color="black", size=12),
axis.title.x = element_text(color="black", size=12),
axis.title.y = element_text(color="black", size=12),
legend.position = "none")
p2a <- ggplot(experiment.aggregate@meta.data, aes(x=nGene)) +
geom_histogram(binwidth=50, color="black", fill="white") + ggtitle("Distribution of nGene") +
geom_vline(aes(xintercept=500),color="red") + geom_vline(aes(xintercept=1700),color="red") + theme_bw()
p2b <- VlnPlot(experiment.aggregate, "nGene", do.return = TRUE, group.by = "orig.ident") +
geom_hline(yintercept = 1700, color= "red")
p2b <- p2b + ggtitle("Distribution of nGene by genotype") + xlab("Genotype") + ylab("count") + theme_bw() +
theme(plot.title = element_text(color="black", size=12),
axis.title.x = element_text(color="black", size=12),
axis.title.y = element_text(color="black", size=12),
legend.position = "none")
p3a <- ggplot(experiment.aggregate@meta.data, aes(x=percent.mito)) +
geom_histogram(binwidth=0.01, color="black", fill="white") + ggtitle("Distribution of mito percentage") +
geom_vline(aes(xintercept=0.3),color="red") + theme_bw()
p3b <- VlnPlot(experiment.aggregate, "percent.mito", do.return = TRUE, group.by = "orig.ident") +
geom_hline(yintercept = 0.299, color= "red")
p3b <- p3b + ggtitle("Distribution of mito % by genotype") + xlab("Genotype") + ylab("count") + theme_bw() +
theme(plot.title = element_text(color="black", size=12),
axis.title.x = element_text(color="black", size=12),
axis.title.y = element_text(color="black", size=12),
legend.position = "none")
grid.arrange(p1a,p2a, p3a, p1b, p2b, p3b, ncol=3)
experiment.aggregate <- FilterCells(object = experiment.aggregate, subset.names = "nGene",
low.thresholds = 500 , high.thresholds = 1700)
experiment.aggregate <- FilterCells(object = experiment.aggregate, subset.names = "nUMI",
low.thresholds = -Inf , high.thresholds = 4000)
experiment.aggregate
## An object of class seurat in project Siavash scRNA
## 14933 genes across 11943 samples.
counts = as.matrix(experiment.aggregate@raw.data[, colnames(experiment.aggregate@data)]) #Extract Raw Data
means <- rowMeans(as.matrix(experiment.aggregate@data))
vars <- rowVars(as.matrix(experiment.aggregate@data))
rData = data.frame(seuratVarGenes = rownames(counts) %in% experiment.aggregate.0.3@var.genes,
NumberCellsOccurIn = as.numeric(rowSums(as.matrix(experiment.aggregate@data) > 0)),
MeanExpr = as.numeric(means),
Var = as.numeric(vars),
CoefofVar = vars/means^2
)
rownames(rData) = rownames(counts)
#Remove mitochondrial, ribosomal, or psuedogenes
pt1 <- grep("^mt-|^Rps|^Rpl|^Mrps|^Mrpl|^Gm", rownames(rData), value = T)
#Remove genes that don't occur in 1% of cells (~12)
pt2 <- rownames(rData[rData$NumberCellsOccurIn < 12,])
#Remove genes with variation below minimum
pt3 <- rownames(rData[rData$Var < median(rData$Var),])
to.remove <- c(pt1, pt2, pt3)
#Update the objects
rData <- rData[!rownames(rData) %in% to.remove, ]
counts <- counts[rownames(rData),]
#Optional plotting
#hist(rData$NumberCellsOccurIn, breaks = 50)
#hist(rData$Var, breaks = 50)
load(paste0(wd, "/", out_dir, "/neuronal_object.RData"))
experiment.aggregate <- CreateSeuratObject(counts, project = "neuronal cells")
experiment.aggregate@meta.data$orig.ident = gsub(".*L5|/", "", rownames(experiment.aggregate@meta.data))
experiment.aggregate <- NormalizeData(object= experiment.aggregate, normalization.method = "LogNormalize", scale.factor = 10000)
experiment.aggregate <- ScaleData(
object = experiment.aggregate, #will populate new slot "scale data", does not change "data" slot
do.scale = TRUE,
do.center = TRUE,
vars.to.regress = c("nUMI", "nGene"))
experiment.aggregate <- RunPCA(
object = experiment.aggregate,
pc.genes = rownames(experiment.aggregate@data), #use all genes
pcs.compute = 20, do.print = F,
maxit = 500)
experiment.aggregate <- JackStraw(
object = experiment.aggregate,
num.replicate = 100,
num.pc = 20)
use.pcs = c(1:8) #Chose all PCs accounting for more than 2% of variance
experiment.aggregate <- FindClusters(
object = experiment.aggregate,
reduction.type = "pca",
dims.use = use.pcs,
resolution = seq(0.3,1.2,0.3),
print.output = FALSE,
force.recalc = TRUE,
save.SNN = TRUE
)
experiment.aggregate <- SetAllIdent(experiment.aggregate, id = "res.0.3")
experiment.aggregate <- RunTSNE( object = experiment.aggregate, reduction.use = "pca", dims.use = use.pcs, do.fast = TRUE)
experiment.aggregate <- BuildClusterTree(experiment.aggregate, pcs.use = use.pcs, do.reorder = F, reorder.numeric = F, do.plot=F)
save(experiment.aggregate, file=paste0(wd, "/", out_dir, "neuronal_object.RData"))
## $Mgst3
##
## $Foxp1
##
## $Bcl11b
cluster3 <- FindMarkers(object = experiment.aggregate, ident.1 = 3, ident.2 = c(0:2), min.pct = 0, test.use = "wilcox")
markers_all <- FindAllMarkers(object = experiment.aggregate, min.pct = 0, thresh.use = 0, only.pos = TRUE, test.use = "wilcox")
save(list = c("cluster3", "markers_all"), file=paste0(wd, "/", out_dir, "/neuronal_markers.RData"))
load(paste0(wd, "/", out_dir, "/neuronal_markers.RData"))
top10_FC <- as.data.frame(markers_all[abs(markers_all$p_val_adj) < 0.05, ] %>% group_by(cluster) %>% top_n(10, avg_logFC))
top10_cluster3 <- as.data.frame(markers_all[abs(markers_all$p_val_adj) < 0.05, ] %>% group_by(cluster) %>% top_n(10, avg_logFC))
top10_FC[top10_FC$cluster == 4,]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## 41 0.000000e+00 1.9430618 0.790 0.159 0.000000e+00 4 Arpp21
## 42 0.000000e+00 1.5189914 0.420 0.053 0.000000e+00 4 Rbp1
## 43 0.000000e+00 1.4957318 0.382 0.029 0.000000e+00 4 Ppp1r1b
## 44 0.000000e+00 1.4244200 0.319 0.014 0.000000e+00 4 Pcp4l1
## 45 1.922197e-300 1.1634648 0.227 0.011 1.378984e-296 4 Rxrg
## 46 2.178525e-261 1.2631225 0.961 0.672 1.562874e-257 4 Pcp4
## 47 2.675738e-256 1.4546196 0.614 0.156 1.919575e-252 4 Gng7
## 48 2.470751e-165 0.8255434 0.133 0.007 1.772517e-161 4 Crabp1
## 49 8.546886e-101 0.9474987 0.368 0.109 6.131536e-97 4 Bcl11b
## 50 5.781851e-82 0.8736184 0.305 0.091 4.147900e-78 4 Carhsp1
FeaturePlot(experiment.aggregate, features.plot = c("Arpp21","Rbp1","Rxrg","Bcl11b"), cols.use = c("lightblue", "red"), nCol = 2, no.legend = F)
top10_FC[top10_FC$cluster == 5,]
## p_val avg_logFC pct.1 pct.2 p_val_adj cluster gene
## 51 0.000000e+00 3.000307 0.583 0.011 0.000000e+00 5 Xist
## 52 4.932569e-149 1.344320 0.350 0.017 3.538625e-145 5 Ogt
## 53 2.377942e-95 1.283703 0.417 0.037 1.705936e-91 5 Pnisr
## 54 7.473697e-89 2.492201 0.450 0.050 5.361630e-85 5 Meg3
## 55 6.068821e-85 4.217211 0.958 0.418 4.353772e-81 5 Malat1
## 56 3.151335e-72 1.479686 0.442 0.056 2.260767e-68 5 Zbtb20
## 57 8.053503e-61 1.337414 0.242 0.020 5.777583e-57 5 Igfbpl1
## 58 1.903641e-41 1.257782 0.408 0.078 1.365672e-37 5 Rsrp1
## 59 1.890606e-39 1.243402 0.683 0.220 1.356321e-35 5 Son
## 60 2.936256e-07 2.434219 0.208 0.078 2.106470e-03 5 Hbb-bs
FeaturePlot(experiment.aggregate, features.plot = c("Xist", "Ogt", "Meg3", "Malat1"), cols.use = c("lightblue", "red"), nCol = 2, no.legend = F)
head(cluster3[order(abs(cluster3$avg_logFC), decreasing = T), ], n=15)
## p_val avg_logFC pct.1 pct.2 p_val_adj
## Ptgds 5.521240e-232 1.3546391 0.278 0.043 3.960937e-228
## Malat1 1.192680e-162 0.8756627 0.713 0.367 8.556284e-159
## Hbb-bs 4.452546e-118 0.6895767 0.221 0.053 3.194257e-114
## Ubc 1.280024e-59 -0.5787463 0.341 0.525 9.182895e-56
## Ncs1 2.989169e-32 -0.5295051 0.202 0.338 2.144430e-28
## Calm2 6.628534e-234 -0.4919251 0.975 0.991 4.755310e-230
## Dynlt1a 9.002883e-65 0.4917497 0.174 0.053 6.458668e-61
## Cldn5 3.007658e-59 0.4773154 0.113 0.027 2.157694e-55
## Hba-a1 5.904735e-55 0.4700749 0.124 0.033 4.236057e-51
## Hist1h2al 1.823679e-64 0.4312643 0.111 0.024 1.308307e-60
## Gnb2l1 6.684370e-51 0.4239240 0.547 0.349 4.795367e-47
## Kif1a 1.781093e-59 0.4192938 0.755 0.562 1.277756e-55
## Capza2 4.875303e-24 -0.4107886 0.268 0.381 3.497542e-20
## Hba-a2 7.629526e-41 0.3903012 0.066 0.014 5.473422e-37
## Fau 5.465946e-159 0.3857561 0.992 0.954 3.921270e-155
FeaturePlot(experiment.aggregate, features.plot = c("Ptgds","Ubc", "Calm2","Cldn5"), cols.use = c("lightblue", "red"), nCol = 2, no.legend = F)
load(paste0(wd, "/outputs/output_02/experiment_aggregate.0.3.RData"))
neuronal_cells <- rownames(experiment.aggregate.0.3@meta.data[experiment.aggregate.0.3@meta.data$res.0.3 %in% c(0,1,2),])
experiment.aggregate <- SubsetData(experiment.aggregate.0.3, cells.use = neuronal_cells, do.clean = T)
experiment.aggregate
## An object of class seurat in project Siavash scRNA
## 14933 genes across 12269 samples.
a <- melt(experiment.aggregate@data["Tbr1",])
a$sample_type <- "WT"
a[grep("NULL", rownames(a)),]$sample_type <- "NULL"
a[grep("HET", rownames(a)),]$sample_type <- "HET"
p1 <- ggplot(a, aes(x=value, fill=sample_type)) +
geom_density(alpha= 0.3) +
scale_x_continuous(name="Normalized counts per cell") +
ylim(0,0.5) + ylab("Density") +
ggtitle("Density of Tbr1 expression")
tbr1.cells.keep <- rownames(a[a$value > 0, , drop=F])
p1
experiment.aggregate <- SubsetData(experiment.aggregate, cells.use = tbr1.cells.keep, do.clean = T)
experiment.aggregate
## An object of class seurat in project Siavash scRNA
## 14933 genes across 567 samples.
hist(experiment.aggregate@meta.data$nGene, breaks = 100);abline(v = c(500,1700), col="red")
experiment.aggregate <- FilterCells(object = experiment.aggregate, subset.names = "nGene",
low.thresholds = 500 , high.thresholds = 1700)
experiment.aggregate
## An object of class seurat in project Siavash scRNA
## 14933 genes across 554 samples.
hist(experiment.aggregate@meta.data$nUMI, breaks = 100);abline(v=4000, col="red")
experiment.aggregate <- FilterCells(object = experiment.aggregate, subset.names = "nUMI",
low.thresholds = -Inf , high.thresholds = 4000)
experiment.aggregate
## An object of class seurat in project Siavash scRNA
## 14933 genes across 541 samples.
counts = as.matrix(experiment.aggregate@raw.data[, colnames(experiment.aggregate@data)]) #Extract Raw Data
means <- rowMeans(as.matrix(experiment.aggregate@data))
vars <- rowVars(as.matrix(experiment.aggregate@data))
rData = data.frame(seuratVarGenes = rownames(counts) %in% experiment.aggregate.0.3@var.genes,
NumberCellsOccurIn = as.numeric(rowSums(as.matrix(experiment.aggregate@data) > 0)),
MeanExpr = as.numeric(means),
Var = as.numeric(vars),
CoefofVar = vars/means^2
)
rownames(rData) = rownames(counts)
#Remove mitochondrial, ribosomal, or psuedogenes
pt1 <- grep("^mt-|^Rps|^Rpl|^Mrps|^Mrpl|^Gm", rownames(rData), value = T)
#Remove genes that don't occur in 1% of cells (~12)
pt2 <- rownames(rData[rData$NumberCellsOccurIn < 5,])
#Remove genes with variation below minimum
pt3 <- rownames(rData[rData$Var < median(rData$Var),])
to.remove <- c(pt1, pt2, pt3)
#Update the objects
rData <- rData[!rownames(rData) %in% to.remove, ]
counts <- counts[rownames(rData),]
#Optional plotting
#hist(rData$NumberCellsOccurIn, breaks = 50)
#hist(rData$Var, breaks = 50)
load(paste0(wd, "/", out_dir, "/Tbr1_neuronal_object.RData"))
experiment.aggregate <- CreateSeuratObject(counts, project = "Tbr1+_neuronal_cells")
experiment.aggregate@meta.data$orig.ident = gsub(".*L5|/", "", rownames(experiment.aggregate@meta.data))
experiment.aggregate <- NormalizeData(object= experiment.aggregate, normalization.method = "LogNormalize", scale.factor = 10000)
experiment.aggregate <- ScaleData(
object = experiment.aggregate, #will populate new slot "scale data", does not change "data" slot
do.scale = TRUE,
do.center = TRUE,
vars.to.regress = c("nUMI", "nGene"))
experiment.aggregate <- RunPCA(
object = experiment.aggregate,
pc.genes = rownames(experiment.aggregate@data), #use all genes
pcs.compute = 12, do.print = F,
maxit = 500)
experiment.aggregate <- JackStraw(
object = experiment.aggregate,
num.replicate = 100,
num.pc = 12)
use.pcs = c(1:7) #Chose all PCs accounting for more than 5% of variance
experiment.aggregate <- FindClusters(
object = experiment.aggregate,
reduction.type = "pca",
dims.use = use.pcs,
resolution = seq(0.3,1.2,0.3),
print.output = FALSE,
force.recalc = TRUE,
save.SNN = TRUE
)
experiment.aggregate <- SetAllIdent(experiment.aggregate, id = "res.0.3")
experiment.aggregate <- RunTSNE( object = experiment.aggregate, reduction.use = "pca", dims.use = use.pcs, do.fast = TRUE)
save(experiment.aggregate, file=paste0(out_dir, "/Tbr1_neuronal_object.RData"))
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin17.7.0 (64-bit)
## Running under: macOS High Sierra 10.13.6
##
## Matrix products: default
## BLAS/LAPACK: /usr/local/Cellar/openblas/0.3.6_1/lib/libopenblasp-r0.3.6.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] gridExtra_2.3 reshape2_1.4.3
## [3] scales_1.0.0 cluster_2.1.0
## [5] dynamicTreeCut_1.63-1 SingleCellExperiment_1.6.0
## [7] SummarizedExperiment_1.14.1 DelayedArray_0.10.0
## [9] BiocParallel_1.18.1 matrixStats_0.55.0
## [11] Biobase_2.44.0 GenomicRanges_1.36.1
## [13] GenomeInfoDb_1.20.0 IRanges_2.18.3
## [15] S4Vectors_0.22.1 BiocGenerics_0.30.0
## [17] dplyr_0.8.3 Seurat_2.3.4
## [19] Matrix_1.2-17 cowplot_1.0.0
## [21] ggplot2_3.2.1 knitr_1.25
##
## loaded via a namespace (and not attached):
## [1] Rtsne_0.15 colorspace_1.4-1 class_7.3-15
## [4] modeltools_0.2-22 ggridges_0.5.1 mclust_5.4.5
## [7] htmlTable_1.13.2 XVector_0.24.0 base64enc_0.1-3
## [10] rstudioapi_0.10 proxy_0.4-23 npsurv_0.4-0
## [13] flexmix_2.3-15 bit64_0.9-7 codetools_0.2-16
## [16] splines_3.6.0 R.methodsS3_1.7.1 lsei_1.2-0
## [19] robustbase_0.93-5 zeallot_0.1.0 Formula_1.2-3
## [22] jsonlite_1.6 ica_1.0-2 kernlab_0.9-27
## [25] png_0.1-7 R.oo_1.22.0 compiler_3.6.0
## [28] httr_1.4.1 backports_1.1.5 assertthat_0.2.1
## [31] lazyeval_0.2.2 lars_1.2 acepack_1.4.1
## [34] htmltools_0.4.0 tools_3.6.0 igraph_1.2.4.1
## [37] GenomeInfoDbData_1.2.1 gtable_0.3.0 glue_1.3.1
## [40] RANN_2.6.1 Rcpp_1.0.2 vctrs_0.2.0
## [43] gdata_2.18.0 ape_5.3 nlme_3.1-141
## [46] iterators_1.0.12 fpc_2.2-3 gbRd_0.4-11
## [49] lmtest_0.9-37 xfun_0.10 stringr_1.4.0
## [52] lifecycle_0.1.0 irlba_2.3.3 gtools_3.8.1
## [55] DEoptimR_1.0-8 zlibbioc_1.30.0 MASS_7.3-51.4
## [58] zoo_1.8-6 doSNOW_1.0.18 RColorBrewer_1.1-2
## [61] yaml_2.2.0 reticulate_1.13 pbapply_1.4-2
## [64] rpart_4.1-15 segmented_1.0-0 latticeExtra_0.6-28
## [67] stringi_1.4.3 foreach_1.4.7 checkmate_1.9.4
## [70] caTools_1.17.1.2 bibtex_0.4.2 Rdpack_0.11-0
## [73] SDMTools_1.1-221.1 rlang_0.4.0 pkgconfig_2.0.3
## [76] dtw_1.21-3 prabclus_2.3-1 bitops_1.0-6
## [79] evaluate_0.14 lattice_0.20-38 ROCR_1.0-7
## [82] purrr_0.3.2 labeling_0.3 htmlwidgets_1.5.1
## [85] bit_1.1-14 tidyselect_0.2.5 plyr_1.8.4
## [88] magrittr_1.5 R6_2.4.0 snow_0.4-3
## [91] gplots_3.0.1.1 Hmisc_4.2-0 pillar_1.4.2
## [94] foreign_0.8-72 withr_2.1.2 fitdistrplus_1.0-14
## [97] mixtools_1.1.0 RCurl_1.95-4.12 survival_2.44-1.1
## [100] nnet_7.3-12 tsne_0.1-3 tibble_2.1.3
## [103] crayon_1.3.4 hdf5r_1.3.0 KernSmooth_2.23-15
## [106] rmarkdown_1.16 grid_3.6.0 data.table_1.12.4
## [109] metap_1.1 digest_0.6.21 diptest_0.75-7
## [112] tidyr_1.0.0 R.utils_2.9.0 munsell_0.5.0